from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_ollama import OllamaLLM

# 带流式输出的调用
llm_stream = OllamaLLM(
    model="deepseek-r1:14b",
    callbacks=[StreamingStdOutCallbackHandler()],  # 实时流式输出
    num_ctx=4096,  # 上下文窗口大小
    num_gpu=99,
    num_thread=8,
)

# 流式调用示例
for chunk in llm_stream.stream("请用JavaScript写一个冒泡排序的函数:"):

    print(chunk, end="", flush=True)